@article{huang2020pixel,
  title={Pixel-BERT: Aligning Image Pixels with Text by Deep Multi-Modal Transformers},
  author={Huang, Zhicheng and Zeng, Zhaoyang and Liu, Bei and Fu, Dongmei and Fu, Jianlong},
  journal={arXiv preprint arXiv:2004.00849},
  year={2020},
  url={https://arxiv.org/pdf/2004.00849.pdf}
}